**************************************************************************************************
*** This do file creates the replication datasets for: 											*/
*** DATE 
*** TITLE	
*** AUTHOR	
*** AUTHOR																						*/
*** AUTHOR																						*/
*** Published in: ***																			*/
**************************************************************************************************


/* File description - "dataset_preparation_CATI.do":
- indicates the link to download all the original datasets needed
- INPUT: sub-folder "original" containing all original datasets
- OUTPUT: recoded datasets in sub-folder "recoded" and "merged.dta" in current directory
- it cleans and recodes the dataset, and keeps only the variables of interest
*/


**************************************************************************************************


/* Before proceeding:
1) change "global current" path 
2) in selected current directory, create sub-folder named "original"
3) copy all the original .dta files in sub-folder "original"
4) run
*/

**************************************************************************************************

clear

*** current directory for data ***CHANGE*** before runnig the .do file
*global current "/Users/av990/Dropbox/2016_Text_Analysis_Italian_TV/03_Analysis/ITANES"
global current "C:/Users/andre/Dropbox/Ricerca/PAPER/2016 Text Analysis Italian TV/03_Analysis/"
cd "$current"

global original "${current}data/external/"
global recoded "${current}data/processed/"

***
clear
set more off
use "${original}ITANES_PANEL_2011_13_CATI.dta", clear


sort IDCONTATTO
gen id=_n+1497
* not to generate duplicates of obs in CAWI dataset (n=1497)

cap drop interview_date*

gen interview_date_w1=Data_W1
gen interview_date_w2=Data_W2
gen interview_date_w3=Data_W3
gen interview_date_w4=Data_W4
gen interview_date_w5=Data_W5

format interview_date_* %td

gen sex=A001 - 1
label var sex "[0 men; 1 women]"
tab sex, m

gen age=A002
recode age (999=.)
tab age, m

gen educ=A004
recode educ (1 2 3 = 0) (4 5 6 = 1) (7 8 = 2) (9 10 11 = 3)
label define educ 0 "Lower education" 1 "Professional diploma" 2 "High School degre (Liceum)" 3 "University degree or +"
label values educ educ

recode ZONA (1=0 "NW")(2=1 "NE")(3=2 "C")(4=3 "S")(5=4 "Islands"), gen(area)

gen churchat=Q47_W1
recode churchat (1 = 0) (2=1) (3=2)(4=3)(5=4) (88 99 =.)
label define churchat 0 "Never" 1 "2-3/year" 2 "1/month" 3 "2-3/month" 4 "1/week"
label values churchat churchat
label var churchat "Frequency of attendance to religious ceremonies"

gen occup=0
replace occup=1 if (Q54_W1==1 | Q54_W1==2) & Q53_W1==1
replace occup=2 if (Q54_W1==1 | Q54_W1==2 | Q54_W1==10) & Q53_W1==2
replace occup=3 if (Q54_W1==3 | Q54_W1==4 | Q54_W1==5 | Q54_W1==99) & Q53_W1==1
replace occup=4 if (Q54_W1==3 | Q54_W1==4 | Q54_W1==5 | Q54_W1==99) & Q53_W1==2
replace occup=5 if Q54_W1 ==6
replace occup=6 if (Q54_W1==8 | Q54_W1==11 | Q54_W1==12 | Q54_W1==13 | Q54_W1==14)
replace occup=7 if (Q54_W1==15 | Q54_W1==16 | Q54_W1==17 | Q55_W1==2)

replace occup=8 if (Q55_W1==3)
replace occup=9 if (Q55_W1==1)
replace occup=10 if (Q55_W1==99 | Q55_W1==10)
replace occup=11 if (Q55_W1==2)
cap lab drop occup
label define occup 0 "Missing/unemployed" 1 "Public manager" 2 "Private manager/Professional"  3 "Public employee/teacher/other public worker"  4 "Private employee/other provate worker" 5 "Manual worker"  6 "Small autonomous worker"  7 "Occasioanl worker" 8 "Student"  9 "Retired" 10 "Other not working/NA/wealthy/not able to work" 11 "Houseperson"
label values occup occup

*** Sources of political info: primary
gen polinfo=Q37_wave4_01_W4
recode polinfo (99 8 6 5 4 2 = 0) (3 = 2) (7 = 3)
label define polinfo 0 "NA/nessuna queste/cont pers/periodici/radio/amici" 1 "TV" 2 "Newspaper" 3 "Internet"
label values polinfo polinfo
label var polinfo "Principale fonte informazione?"

gen polinfo2=Q37_wave4_02_W4
recode polinfo2 (99 8 6 5 4 2 = 0) (3 = 2) (7 = 3)
label define polinfo2 0 "NA/nessuna queste/cont pers/periodici/radio/amici" 1 "TV" 2 "Newspaper" 3 "Internet"
label values polinfo2 polinfo2
label var polinfo2 "Secondaria fonte informazione?"


*** Favourite TV news program - Only for waves 1-3 ***

* Wave 1
gen tg_w1 = .
replace tg_w1 = 1 if Q39_01_W1==1 
replace tg_w1 = 2 if Q39_01_W1==2
replace tg_w1 = 3 if Q39_01_W1==3
replace tg_w1 = 4 if Q39_01_W1==4
replace tg_w1 = 5 if Q39_01_W1==5
replace tg_w1 = 6 if Q39_01_W1==6
replace tg_w1 = 7 if Q39_01_W1==7
replace tg_w1 = 8 if Q39_01_W1==8 | Q39_01_W1==9 | Q39_01_W1==10 | Q39_01_W1==99

label define tg 1 "Rai 1" 2 "Rai 2" 3 "Rai 3" 4 "Rete 4" 5 "Canale 5" 6 "Italia 1" 7 "La 7" 8 "Other / DK"
label values tg_w1 tv

* Wave 2
gen tg_w2 = .
replace tg_w2 = 1 if Q39_01_W2==1 
replace tg_w2 = 2 if Q39_01_W2==2
replace tg_w2 = 3 if Q39_01_W2==3
replace tg_w2 = 4 if Q39_01_W2==4
replace tg_w2 = 5 if Q39_01_W2==5
replace tg_w2 = 6 if Q39_01_W2==6
replace tg_w2 = 7 if Q39_01_W2==7
replace tg_w2 = 8 if Q39_01_W2==8 | Q39_01_W2==9 | Q39_01_W2==10 | Q39_01_W2==99

label values tg_w2 tv

* Wave 3
gen tg_w3 = .
replace tg_w3 = 1 if Q39_01_W3==1 
replace tg_w3 = 2 if Q39_01_W3==2
replace tg_w3 = 3 if Q39_01_W3==3
replace tg_w3 = 4 if Q39_01_W3==4
replace tg_w3 = 5 if Q39_01_W3==5
replace tg_w3 = 6 if Q39_01_W3==6
replace tg_w3 = 7 if Q39_01_W3==7
replace tg_w3 = 8 if Q39_01_W3==8 | Q39_01_W3==9 | Q39_01_W3==10 | Q39_01_W3==99

label values tg_w3 tv


*** Frequency watching TV news
gen tg_freq_w1=.
replace tg_freq_w1 = Q38_W1 - 1
recode tg_freq_w1 (87 98 = .)
gen tg_freq_w2=.
replace tg_freq_w2 = Q38_W2 - 1
recode tg_freq_w2 (87 98 = .)
gen tg_freq_w3=.
replace tg_freq_w3 = Q38_W3 - 1
recode tg_freq_w3 (87 98 = .)



*** Propensity to vote for PDL ***
cap drop ptv*
* Wave 1 
gen ptv_pd_w1 = Q36_04_W1
gen ptv_pdl_w1 = Q36_06_W1
gen ptv_ln_w1 = Q36_09_W1
* Wave 2
gen ptv_pd_w2 = Q36_04_W2
gen ptv_pdl_w2 = Q36_06_W2
gen ptv_ln_w2 = Q36_09_W2
* Wave 3
gen ptv_pd_w3 = Q36_04_W3
gen ptv_pdl_w3 = Q36_06_W3
gen ptv_ln_w3 = Q36_09_W3
* Wave 4
gen ptv_pd_w4 = Q36_04_W4
gen ptv_pdl_w4 = Q36_06_W4
gen ptv_ln_w4 = Q36_09_W4
* Wave 5 * No variables*
gen ptv_pd_w5 = .
gen ptv_pdl_w5 = .
gen ptv_ln_w5 = .
foreach x of varlist ptv_* {
	recode `x' (88 99 = .)
}

*** Vote for PDL ***
cap drop vote_pdl_w1
gen vote_pdl_w1 = 0
replace vote_pdl_w1=. if Q48_W1==88
replace vote_pdl_w1=. if Q48_W1==99 /* NA */
replace vote_pdl_w1=1 if Q48_W1==6 /* PDL vote intention */
cap drop vote_pdl_w2
gen vote_pdl_w2 = 0
replace vote_pdl_w2=. if Q48_W2==88 /* DN */
replace vote_pdl_w2=. if Q48_W2==99 /* NA */
replace vote_pdl_w2=1 if Q48_W2==6 /* PDL vote intention */
cap drop vote_pdl_w3
gen vote_pdl_w3 = 0
replace vote_pdl_w3=. if Q48_W3==88 /* DN */
replace vote_pdl_w3=. if Q48_W3==99 /* NA */
replace vote_pdl_w3=1 if Q48_W3==6 /* PDL vote intention */

*** Vote for government coalition ***
cap drop vote_gov_w1
gen vote_gov_w1 = 0
replace vote_gov_w1=. if Q48_W1==88 /* DN */
replace vote_gov_w1=. if Q48_W1==99 /* NA */
replace vote_gov_w1=1 if Q48_W1==6 /* PDL vote intention */
replace vote_gov_w1=1 if Q48_W1==9 /* LN vote intention */
cap drop vote_gov_w2
gen vote_gov_w2 = 0
replace vote_gov_w2=. if Q48_W2==88 /* DN */
replace vote_gov_w2=. if Q48_W2==99 /* NA */
replace vote_gov_w2=1 if Q48_W2==6 /* PDL vote intention */
replace vote_gov_w2=1 if Q48_W2==9 /* LN vote intention */
cap drop vote_gov_w3
gen vote_gov_w3 = 0
replace vote_gov_w3=. if Q48_W3==88 /* DN */
replace vote_gov_w3=. if Q48_W3==99 /* NA */
replace vote_gov_w3=1 if Q48_W3==6 /* PDL vote intention */
replace vote_gov_w3=1 if Q48_W3==9 /* LN vote intention */

*** Trust in judicial system (magistrati) ***
cap drop trust*
* Wave 1 
gen trust_mag_w1 = . /* Pooling the two split-half samples in the first wave */
replace trust_mag_w1 = Q11_04_W1   /* 1-4 scale */
recode trust_mag_w1 (1=1)(2=3)(3=7)(4=11)(88 99 =.)
replace trust_mag_w1 = Q12_04_W1 if trust_mag_w1==.   /* 0-10 scale */
* Wave 2
gen trust_mag_w2 = Q12_04_W2
* Wave 3
gen trust_mag_w3 = Q12_04_W3

foreach x of varlist trust_* {
	recode `x' (88 99 = .)
}


*** Best at handling most important problem [PDL government is best]
cap drop best_mip*
* Wave 1 
recode Q14_W1 (2 3 4 77 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (77 88 99=.), gen(best_mip_w1)
* Wave 2
recode Q14_W2 (2 3 4 77 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (77 88 99=.), gen(best_mip_w2)
* Wave 3
recode Q14_W3 (2 3 4 77 = 0 "PDL gov not best at MIP") (1=1 "PDL gov best at MIP") (77 88 99=.), gen(best_mip_w3)


*** Best at handling immigration problem [PDL government is best]
cap drop best_immig*
* Wave 1 
recode Q16_01_W1 (1 3 4 77 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (77 88 99=.), gen(best_immig_w1)
* Wave 2
recode Q16_01_W2 (1 3 4 77 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (77 88 99=.), gen(best_immig_w2)
* Wave 3
recode Q16_01_W3 (1 3 4 77 = 0 "PDL gov not best at MIP") (2=1 "PDL gov best at MIP") (77 88 99=.), gen(best_immig_w3)


*** Leader's evaluation: Berlusconi
cap drop berl*
gen berl_w1= Q27_04_W1
gen berl_w2= Q27_04_W2
gen berl_w3= Q27_04_W3
gen berl_w4= Q27_04_W4
gen berl_w5= Q27_04_W5
foreach x of varlist berl_* {
	recode `x' (77 88 99 = .)
}

*** Party Identification for PD and PDL [strength component] ***

* Categories in the directional component are changed to fit in the PTVs

* Strenght component of Party Id.
gen pid_s1=Q22_W1
gen pid_s2=Q22_W2
gen pid_s3=Q22_W3
gen pid_s4=Q22_W4
gen pid_s5=Q22_W5

recode pid_s1 (88 99 =1) if (Q21_W1 != 88 | Q21_W1 != 77 | Q21_W1 != 99)
recode pid_s1 (88 99 =.) if (Q21_W1 == 88 | Q21_W1 == 77 | Q21_W1 == 99 | Q21_W1==. | Q21_W1==66)

recode pid_s2 (88 99 =1) if (Q21_W2 != 88 | Q21_W2 != 77 | Q21_W2 != 99)
recode pid_s2 (88 99 =.) if (Q21_W2 == 88 | Q21_W2 == 77 | Q21_W2 == 99 | Q21_W2==. | Q21_W2==66)

recode pid_s3 (88 99 =1) if (Q21_W3 != 88 | Q21_W3 != 77 | Q21_W3 != 99)
recode pid_s3 (88 99 =.) if (Q21_W3 == 88 | Q21_W3 == 77 | Q21_W3 == 99 | Q21_W3==. | Q21_W3==66)

recode pid_s4 (88 99 =1) if (Q21_W4 != 88 | Q21_W4 != 77 | Q21_W4 != 99)
recode pid_s4 (88 99 =.) if (Q21_W4 == 88 | Q21_W4 == 77 | Q21_W4 == 99 | Q21_W4==. | Q21_W4==66)

recode pid_s5 (88 99 =1) if (Q21_W5 != 88 | Q21_W5 != 77 | Q21_W5 != 99)
recode pid_s5 (88 99 =.) if (Q21_W5 == 88 | Q21_W5 == 77 | Q21_W5 == 99 | Q21_W5==. | Q21_W5==66)

* Wave 1
gen pid_pd_w1 = 0
replace pid_pd_w1 = pid_s1 if Q21_W1==4    /* Partito Democratico (Bersani) */
replace pid_pd_w1 = pid_s1 if Q21_W1==14   /* SDI (Boselli) */
replace pid_pd_w1 = pid_s1 if Q21_W1==15   /* Democratici di Sinistra */
replace pid_pd_w1 = pid_s1 if Q21_W1==17   /* La Margherita (Rutelli) */

gen pid_pdl_w1 = 0
replace pid_pdl_w1 = pid_s1 if  Q21_W1==6  /* Popolo della Libertà (Berlusconi) */
replace pid_pdl_w1=pid_s1 if Q21_W1==20    /* Forza Italia (Berlusconi) */

* Wave 2
gen pid_pd_w2 = 0
replace pid_pd_w2 = pid_s2 if Q21_W2 ==4
replace pid_pd_w2 = pid_s2 if Q21_W2 ==14
replace pid_pd_w2 = pid_s2 if Q21_W2 ==15
replace pid_pd_w2 = pid_s2 if Q21_W2 ==17

gen pid_pdl_w2 = 0
replace pid_pdl_w2 = pid_s2 if Q21_W2 ==6
replace pid_pdl_w2 = pid_s2 if Q21_W2 ==20

* Wave 3
gen pid_pd_w3 = 0
replace pid_pd_w3 = pid_s3 if Q21_W3 ==4
replace pid_pd_w3 = pid_s3 if Q21_W3 ==14
replace pid_pd_w3 = pid_s3 if Q21_W3 ==15
replace pid_pd_w3 = pid_s3 if Q21_W3 ==17

gen pid_pdl_w3 = 0
replace pid_pdl_w3 = pid_s3 if Q21_W3 ==6
replace pid_pdl_w3 = pid_s3 if Q21_W3 ==20

* Wave 4
gen pid_pd_w4 = 0
replace pid_pd_w4 = pid_s4 if Q21_W4 ==4
replace pid_pd_w4 = pid_s4 if Q21_W4 ==14

gen pid_pdl_w4 = 0
replace pid_pdl_w4 = pid_s4 if Q21_W4 ==6
replace pid_pdl_w4 = pid_s4 if Q21_W4 ==20

* Wave 5
gen pid_pd_w5 = 0
replace pid_pd_w5 = pid_s5 if Q21_W5 == 4
replace pid_pd_w5 = pid_s5 if Q21_W5 == 15

gen pid_pdl_w5 = 0
replace pid_pdl_w5 = pid_s5 if Q21_W5 == 6
replace pid_pdl_w5 = pid_s5 if Q21_W5 == 20


*** Directional PID (all main parties, time-invariant from W1)
recode Q21_W1 (77 = 0 "Independent") (1 2 3 5 13 = 1 "Partisan for a leftist party") (4 14 15 17 = 2 "Partisan for PD") (6 20 = 3 "Partisan for PDL") (9 11 19 21=4 "Partisan for rightist party") (7 8 10 12 16 18 22 66= 5 "Other") (88 99=.), gen(pid)


************************************************
***********  Other  variables       ************
************************************************

*** Interest in politics
gen polint_w1 = Q01_W1 - 1
recode polint_w1 (87 98 = .)
label var polint_w1 "Interest in Politics W1 [0 not at all 3 much]"

gen polint_w2 = Q01_W2 - 1
recode polint_w2 (87 98 = .)
label var polint_w2 "Interest in Politics W1 [0 not at all 3 much]"

gen polint_w3 = Q01_W3 - 1
recode polint_w3 (87 98 = .)
label var polint_w3 "Interest in Politics W1 [0 not at all 3 much]"


*** Left-Right self-positioning
gen lr_w1 = Q34_W1
recode lr_w1 (88 99 = .)
label var lr_w1 "Left-Right self-reported [0 left 10 right]"

gen lr_w2 = Q34_W2
recode lr_w2 (88 99 = .)
label var lr_w2 "Left-Right self-reported [0 left 10 right]"

gen lr_w3 = Q34_W3
recode lr_w3 (88 99 = .)
label var lr_w3 "Left-Right self-reported [0 left 10 right]"

gen lr_w4 = Q34_W4
recode lr_w4 (88 99 = .)
label var lr_w4 "Left-Right self-reported [0 left 10 right]"

gen lr_w5 = Q34_W5
recode lr_w5 (88 99 = .)
label var lr_w5 "Left-Right self-reported [0 left 10 right]"

*** LR distance from PDL's perceived position
foreach x of varlist Q35_01* {
	recode `x' 88 99=.
}
gen lrdist_pdl_w1 = abs(lr_w1 - Q35_01_W1 )
gen lrdist_pdl_w2 = abs(lr_w2 - Q35_01_W2 )
gen lrdist_pdl_w3 = abs(lr_w3 - Q35_01_W3 )
gen lrdist_pdl_w4 = abs(lr_w4 - Q35_01_W4 )
gen lrdist_pdl_w5 = abs(lr_w5 - Q35_01_W5 )


*** LR distance from PD's perceived position
foreach x of varlist Q35_02* {
	recode `x' 88 99=.
}
gen lrdist_pd_w1 = abs(lr_w1 - Q35_02_W1 )
gen lrdist_pd_w2 = abs(lr_w2 - Q35_02_W2 )
gen lrdist_pd_w3 = abs(lr_w3 - Q35_02_W3 )
gen lrdist_pd_w4 = abs(lr_w4 - Q35_02_W4 )
gen lrdist_pd_w5 = abs(lr_w5 - Q35_02_W5 )

*** Retrospective sociotropic economic assessment
gen retsoc_w1 = Q02_W1 - 1
recode retsoc_w1 (87 98 = .)
label var retsoc_w1 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w2 = Q02_W2 - 1
recode retsoc_w2 (87 98 = .)
label var retsoc_w2 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w3 = Q02_W3 - 1
recode retsoc_w3 (87 98 = .)
label var retsoc_w3 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w4 = Q02_W4 - 1
recode retsoc_w4 (87 98 = .)
label var retsoc_w4 "Retro Sociot. economy [0 much worse 4 much better]"

gen retsoc_w5 = Q02_W5 - 1
recode retsoc_w5 (87 98 = .)
label var retsoc_w5 "Retro Sociot. economy [0 much worse 4 much better]"

forvalues i = 1/5 {
	gen retsoc01_w`i' = .
	replace retsoc01_w`i' = -1 if (retsoc_w`i'==0 | retsoc_w`i' ==1)
	replace retsoc01_w`i' = 0 if (retsoc_w`i'==2)
	replace retsoc01_w`i' = 1 if (retsoc_w`i'==3 | retsoc_w`i' ==4)
	label var retsoc01_w`i' "Retro Sociot. economy [-1 worse; 0 same; 1 better]"
}


*** Retrospective evaluation of the government
gen gov_w1=. /* Pooling the two split-half samples in the first wave */
replace gov_w1=Q04_01_W1    /* 1-4 scale */
recode gov_w1 (1=10)(2=7)(3=3)(4=0)(88 99 =.)
replace gov_w1=Q04_02_W1 if gov_w1==. /* 0-10 scale */
recode gov_w1 (88 99=.)

gen gov_w2=Q04_02_W2
recode gov_w2 (88 99=.)

gen gov_w3=Q04_02_W2
recode gov_w3 (88 99=.)

gen gov_w4=Q04_wave4_03_W4
recode gov_w4 (88 99=.)

gen gov_w5=Q04_wave4_03_W5
recode gov_w5 (88 99=.)

forvalues i = 1/5 {
	label var gov_w`i' "Government approval Wave `i' [0-10]"
}


*** Vote intention for PDL
gen vintent_w1 = 0
replace vintent_w1 = 1 if Q48_W1== 6

gen vintent_w2 = 0
replace vintent_w2 = 1 if Q48_W2== 6

gen vintent_w3 = 0
replace vintent_w3 = 1 if Q48_W3== 6

gen vintent_w4 = 0
replace vintent_w4 = 1 if Q48_wave4_03_W4== 6

gen vintent_w5 = 0
replace vintent_w5 = 1 if Q48_w5_02_03_W5== 8  

****************************
*** VARIABLES' CHECK********
foreach x of varlist id-vintent_w5 {
	sum `x'
}

keep id-vintent_w5 
sort id

gen dataset = 0
label define dataset 0 "CATI" 1 "CAWI"
label values dataset dataset

save "${recoded}dataset_idlevel_CATI.dta", replace
clear

* Continue with dataset_preparation_CAWI.do
